In [14]:
# %load kegg_parserer_ko2_KO_counts.py
#Parses the KEG htext file format

# from argparse import ArgumentParser
import re
import sys
import os
import csv
import pandas as pd
import numpy as np

In [65]:
ko2Tab=pd.read_table('ko00002_table.tab', index_col='KeggID')
All_KO_Counts=pd.read_table('PhytoAll_KO_Counts.tab', index_col='gID')

In [41]:



Out[41]:
S1      0.534060
S2      0.564492
S3      0.767502
S4      0.772130
S5      0.870291
mean    0.840329
dtype: float64

In [66]:
from matplotlib import pyplot as plt
from matplotlib import gridspec
from itertools import combinations
import palettable.colorbrewer as b2m
import palettable as pal
import glob
from Bio import SeqIO
from string import ascii_lowercase, ascii_uppercase
from matplotlib.colors import LogNorm, NoNorm
%matplotlib inline
def HeatMap(All_insitu_Percent, columns=None,colormap=b2m.sequential.YlOrRd_9.get_mpl_colormap(), m=1e-5):
    if columns:
        All_insitu_Percent=All_insitu_Percent[columns]
    # sckos=['SS1','SS2','SS3', 'SS4', 'SS5']
    All_insitu_Percent['mean']=All_insitu_Percent.mean(skipna=1, axis=1) #calculate mean value for each class

    All_insitu_Percent=All_insitu_Percent.sort(columns='mean', ascending=False)#Sort by the mean value

    All_insitu_Percent=All_insitu_Percent.drop('mean',1) #drop mean column
    All_insitu_Percent=All_insitu_Percent.loc[All_insitu_Percent.sum(axis=1)!=0]

    col_labels=list(All_insitu_Percent.index)
    row_labels=list(All_insitu_Percent.columns.values)
    fig3,ax3=plt.subplots()
    fig3.set_figheight(len(col_labels)/300)
    fig3.set_figwidth(len(row_labels))
    heatmap3 = ax3.pcolor(All_insitu_Percent, cmap=colormap, norm=LogNorm(vmin=m, vmax=All_insitu_Percent.max().max()))
    #heatmap3 = ax3.pcolor(All_insitu_Percent, cmap=plt.cm.jet, vmin=0, vmax=.25)
    ax3.set_xticks(np.arange(All_insitu_Percent.shape[1])+0.5, minor=False)
    ax3.set_yticks(np.arange(All_insitu_Percent.shape[0])+0.5, minor=False)
    ax3.invert_yaxis()
    ax3.xaxis.tick_top()
    ax3.margins(0,0)

    ax3.set_xticklabels(row_labels, minor=False)
    ax3.set_yticklabels(col_labels, minor=False)
    plt.colorbar(heatmap3)
    plt.show()
    return fig3

In [77]:
HeatMap(All_KO_Counts)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-77-ba2e6b89481b> in <module>()
----> 1 HeatMap(All_KO_Counts)

<ipython-input-66-f0a339e855f0> in HeatMap(All_insitu_Percent, columns, colormap, m)
     28     #heatmap3 = ax3.pcolor(All_insitu_Percent, cmap=plt.cm.jet, vmin=0, vmax=.25)
     29     ax3.set_xticks(np.arange(All_insitu_Percent.shape[1])+0.5, minor=False)
---> 30     ax3.set_yticks(np.arange(All_insitu_Percent.shape[0])+0.5, minor=False)
     31     ax3.invert_yaxis()
     32     ax3.xaxis.tick_top()

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in set_yticks(self, ticks, minor)
   2871             Sets the minor ticks if *True*
   2872         """
-> 2873         return self.yaxis.set_ticks(ticks, minor=minor)
   2874 
   2875     def get_ymajorticklabels(self):

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in set_ticks(self, ticks, minor)
   1591         else:
   1592             self.set_major_locator(mticker.FixedLocator(ticks))
-> 1593             return self.get_major_ticks(len(ticks))
   1594 
   1595     def _update_label_position(self, bboxes, bboxes2):

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in get_major_ticks(self, numticks)
   1299             # update the new tick label properties from the old
   1300             for i in range(numticks - len(self.majorTicks)):
-> 1301                 tick = self._get_tick(major=True)
   1302                 self.majorTicks.append(tick)
   1303 

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in _get_tick(self, major)
   1962         else:
   1963             tick_kw = self._minor_tick_kw
-> 1964         return YTick(self.axes, 0, '', major=major, **tick_kw)
   1965 
   1966     def _get_label(self):

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in __init__(self, axes, loc, label, size, width, color, tickdir, pad, labelsize, labelcolor, zorder, gridOn, tick1On, tick2On, label1On, label2On, major)
    148         self.tick1line = self._get_tick1line()
    149         self.tick2line = self._get_tick2line()
--> 150         self.gridline = self._get_gridline()
    151 
    152         self.label1 = self._get_text1()

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/axis.pyc in _get_gridline(self)
    565                     linewidth=rcParams['grid.linewidth'],
    566                     alpha=rcParams['grid.alpha'],
--> 567                     markersize=0
    568                     )
    569 

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/lines.pyc in __init__(self, xdata, ydata, linewidth, linestyle, color, marker, markersize, markeredgewidth, markeredgecolor, markerfacecolor, markerfacecoloralt, fillstyle, antialiased, dash_capstyle, solid_capstyle, dash_joinstyle, solid_joinstyle, pickradius, drawstyle, markevery, **kwargs)
    320         self.set_solid_joinstyle(solid_joinstyle)
    321 
--> 322         self.set_linestyle(linestyle)
    323         self.set_drawstyle(drawstyle)
    324         self.set_linewidth(linewidth)

/Users/harrietalexander/anaconda/lib/python2.7/site-packages/matplotlib/lines.pyc in set_linestyle(self, linestyle)
    952 
    953         for ds in self.drawStyleKeys:  # long names are first in the list
--> 954             if linestyle.startswith(ds):
    955                 self.set_drawstyle(ds)
    956                 if len(linestyle) > len(ds):

KeyboardInterrupt: 

In [76]:
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as dist
import matplotlib.pylab as pylab

inDF=All_KO_Counts
distMatrix=dist.pdist(inDF.T, 'Correlation')
distSqMatrix = dist.squareform(distMatrix)
linkageMatrix=sch.linkage(distSqMatrix) 
dendro = sch.dendrogram(linkageMatrix)
leaves = dendro['leaves'] 
dendro = sch.dendrogram(linkageMatrix, labels=inDF.columns[leaves])


Genes associated with C00353:

  • K10960
  • K02291 phytoene synthase [EC:2.5.1.32] : in index-- similar pattern?

Not annotated in the transcriptome:

  • K17841 15-cis-phytoene synthase / lycopene beta-cyclase [EC:2.5.1.32 5.5.1.19]

In [108]:
def plotKOByTime(kid):
    if kid in All_KO_Counts.index:
        y=All_KO_Counts.loc[kid]
        plt.plot(np.arange(5),y[0:5])
    else:
        print kid + ' is not in the index'

In [109]:
plotKOByTime('K02291')
plotKOByTime('K17841')


K17841 is not in the index

In [96]:
kid='K02291'
All_KO_Counts.loc[kid]


Out[96]:
S1      43.579303
S2      44.669163
S3      18.538132
S4      24.974415
S5      12.942789
mean    28.940760
Name: K02291, dtype: float64

In [ ]: